# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 05_balance.R
### This script creates balance tables for students and villages.
# ----
# Balance table (students) ----
# set relevant predictors
balance_predictors <- list(
  list(dv = "gender_num", label = "Gender - Girl", type = "Demographic"),
  list(dv = "class", label = "Grade", type = "Demographic"),
  list(dv = "age", label = "Age", type = "Demographic"),
  list(dv = "religion_hindu_num", label = "Religion - Hindu", type = "Demographic"),
  list(dv = "language_hindu_num", label = "Language - Hindi", type = "Demographic"),
  list(dv = "caste_gen", label = "Caste - GEN", type = "Demographic"),
  list(dv = "caste_obc", label = "Caste - OBC/EBC", type = "Demographic"),
  list(dv = "caste_sc", label = "Caste - SC", type = "Demographic"),
  list(dv = "caste_st", label = "Caste - ST", type = "Demographic"),
  list(dv = "asset_index", label = "Asset Index", type = "Demographic"),
  list(dv = "fathers_education", label = "Father's Education", type = "Education"),
  list(dv = "mothers_education", label = "Mother's Education", type = "Education"),
  list(dv = "school_gov_num", label = "Government School", type = "Education"),
  list(dv = "science", label = "Science Knowledge", type = "Education"),
  list(dv = "mobile_internet_num", label = "Mobile Internet", type = "Demographic"),
  list(dv = "trust_newspapers_num", label = "Trust Newspapers", type = "Trust"),
  list(dv = "trust_social_media_num", label = "Trust Social Media", type = "Trust"),
  list(dv = "trust_tv_num", label = "Trust TV", type = "Trust"),
  list(dv = "trust_friends_family_num", label = "Trust Friends and Family", type = "Trust"),
  list(dv = "vaccinated_num", label = "Trust Vaccinated", type = "Trust"),
  list(dv = "ayurveda_effective_num", label = "Trust Ayurveda", type = "Trust"))

balance_predictors <- rbindlist(balance_predictors, fill = TRUE, use.names = TRUE)

balance_predictors <- data.frame(balance_predictors)

# Function for regression balance with means calculation
tab_regression_balance_with_means <- function(dv, iv, design) {
  fml <- as.formula(paste(dv, "~", iv, "+ library_spillover_pre"))  
  mod <- svyglm(fml, design = design)
  
  # Calculate means for treatment and control groups
  mean_treat <- svymean(as.formula(paste("~", dv)), subset(design, get(iv) == "Media Literacy" & !is.na(get(dv))))
  mean_control <- svymean(as.formula(paste("~", dv)), subset(design, get(iv) == "Spoken English" & !is.na(get(dv))))
  
  # Create output with regression results and means
  out <- tidy(mod) %>%
    mutate(
      dv = dv, 
      iv = iv,
      n = nobs(mod),
      mean_treatment = coef(mean_treat)[1],
      mean_control = coef(mean_control)[1],
      .before = term
    )
  out
}

# Apply the function to all balance predictors
regressions_balance <- lapply(balance_predictors$dv, function(dv) {
  tab_regression_balance_with_means(dv, iv = "treatment", bimli_svy_dkr.rm %>%
                                      mutate(
                                        caste_gen = ifelse(caste == "GEN", 1, 0),
                                        caste_obc = ifelse(caste == "OBC/EBC", 1, 0),
                                        caste_sc = ifelse(caste == "SC", 1, 0),
                                        caste_st = ifelse(caste == "ST", 1, 0)))}) %>%
  bind_rows() %>%
  mutate(
    margin = qnorm(0.975) * std.error,
    lower = estimate - margin,
    upper = estimate + margin
  ) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))  %>% # remove intercepts and district FEs
  mutate(p.value_bh = p.adjust(p.value, method = 'BH', n = length(p.value)))

# Merge regression results with balance predictors and recode labels
regressions_balance <- left_join(balance_predictors, regressions_balance, by = "dv") %>%
  rowwise() %>%
  mutate(
    label_rec = case_when(grepl(dv, term) & nchar(term) > nchar(dv) ~ paste(label, "-", sub(dv, "", term)),
                          TRUE ~ label)) %>%
  ungroup()

# Create the final table with means
regressions_balance %>%
  select(label_rec, n, mean_treatment, mean_control, estimate, std.error, p.value, p.value_bh) %>%
  mutate(
    n = format(n, big.mark = ","),
    mean_treatment = round(mean_treatment, 2),
    mean_control = round(mean_control, 2),
    sig.stars = case_when(p.value < 0.001 ~ "***",
                          p.value < 0.01 ~ "**",
                          p.value < 0.05 ~ "*",
                          TRUE ~ ""),
    sig.stars_bh = case_when(p.value_bh < 0.001 ~ "***",
                             p.value_bh < 0.01 ~ "**",
                             p.value_bh < 0.05 ~ "*",
                             TRUE ~ ""),
    estimate = str_c(format(round(estimate, 2), nsmall = 2)), # add significance stars to estimate
    std.error = round(std.error, 3),
    
    p.value = case_when(p.value > 0.9 ~ "$>$0.9",
                        p.value < 0.001 ~ "$<$0.001",
                        p.value < 0.01 ~ as.character(round(p.value, 3)),
                        TRUE ~ as.character(round(p.value, 2))),
    p.value_bh = case_when(p.value_bh > 0.9 ~ "$>$0.9",
                           p.value_bh < 0.001 ~ "$<$0.001",
                           p.value_bh < 0.01 ~ as.character(round(p.value_bh, 3)),
                           TRUE ~ as.character(round(p.value_bh, 2)))) %>%
  mutate(
    p.value = str_c(p.value, sig.stars),
    p.value_bh = str_c(p.value_bh, sig.stars_bh)
  ) %>%
  select(-sig.stars, -sig.stars_bh) %>%
  rename(
    ` ` = label_rec,
    N = n,
    `Treatment` = mean_treatment,
    `Control` = mean_control,
    `Diff.` = estimate,
    SE = std.error,
    `p` = p.value,
    `p (FDR)` = p.value_bh
  ) %>%
  xtable::xtable(caption = "Balance between treatment and control participants", align = "lllllllll", digits = 3,
                 label = "tab:balance_means") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        sanitize.text.function = identity, # add backslashes to special characters
        sanitize.colnames.function = \(x) str_c("\\textbf{", x, "}"),
        add.to.row = list(
          pos = list(nrow(.), nrow(.)), # Two lines added at the end of the table
          command = c(
            "\\hline\n\\multicolumn{8}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover FEs.} \\\\\n",
            "\\multicolumn{8}{l}{\\footnotesize Last column reports p-values adjusted for the False Discovery Rate (FDR).} \\\\\n"
          )
        ),
        hline.after = c(-1, 0), # Add horizontal lines before and after headers
        scalebox = "0.8",
        comment = FALSE,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_balance", ".tex"))

## Balance table (villages) ----
# YET TO DO - PRIYADARSHI

# END of 05_balance.R ----
